Taken from Steven Turner's Template for analysis with DESeq2 and slightly modified for my usecase/jupyter format.

Install DESeq2 if needed


In [ ]:
source("https://bioconductor.org/biocLite.R")
biocLite("DESeq2")

In [ ]:
library(DESeq2)

In [ ]:
install.packages("calibrate")

In [ ]:
install.packages("gplots")

Read in count data and set up the matrix for DESeq

In this case data.csv is assumed to be produced by htseq-count or similar.


In [ ]:
countdata = read.csv('data.csv', header=TRUE, row.names=1)
countdata = as.matrix(countdata)
head(countdata)

Experimental conditions and number of replicates


In [ ]:
(condition = factor(c(rep("control",3), rep("treatment",3))))

In [ ]:
(coldata = data.frame(row.names=colnames(countdata), condition))

Create DESeq object and fit the model


In [ ]:
dds = DESeqDataSetFromMatrix(countData=countdata, colData=coldata, design=~condition)
dds

In [ ]:
dds = DESeq(dds)

Plot dispersions


In [ ]:
plotDispEsts(dds, main="Dispersion plot")

Regularized log transform for heatmaps/clustering plots


In [ ]:
rld = rlogTransformation(dds)

In [ ]:
head(assay(rld))

In [ ]:
hist(assay(rld))

In [ ]:
library(RColorBrewer)

In [ ]:
(mycols = brewer.pal(8, "Dark2")[1:length(unique(condition))])

In [ ]:
sampleDists = as.matrix(dist(t(assay(rld))))

In [ ]:
library(gplots)

In [ ]:
heatmap(as.matrix(sampleDists), key=F, trace="none",
       col=colorpanel(100, "black", "white"),
       ColSideColors=mycols[condition], RowSideColor=mycols[condition],
       margin=c(10,10), main="Sample Distance Matrix")

Principal component analysis


In [ ]:
rld_pca <- function (rld, intgroup = "condition", ntop = 500, colors=NULL, legendpos="bottomleft", main="PCA Biplot", textcx=1, ...) {
  require(genefilter)
  require(calibrate)
  require(RColorBrewer)
  rv = rowVars(assay(rld))
  select = order(rv, decreasing = TRUE)[seq_len(min(ntop, length(rv)))]
  pca = prcomp(t(assay(rld)[select, ]))
  fac = factor(apply(as.data.frame(colData(rld)[, intgroup, drop = FALSE]), 1, paste, collapse = " : "))
  if (is.null(colors)) {
    if (nlevels(fac) >= 3) {
      colors = brewer.pal(nlevels(fac), "Paired")
    }   else {
      colors = c("black", "red")
    }
  }
  pc1var <- round(summary(pca)$importance[2,1]*100, digits=1)
  pc2var <- round(summary(pca)$importance[2,2]*100, digits=1)
  pc1lab <- paste0("PC1 (",as.character(pc1var),"%)")
  pc2lab <- paste0("PC1 (",as.character(pc2var),"%)")
  plot(PC2~PC1, data=as.data.frame(pca$x), bg=colors[fac], pch=21, xlab=pc1lab, ylab=pc2lab, main=main, ...)
  with(as.data.frame(pca$x), textxy(PC1, PC2, labs=rownames(as.data.frame(pca$x)), cex=textcx))
  legend(legendpos, legend=levels(fac), col=colors, pch=20)
  #     rldyplot(PC2 ~ PC1, groups = fac, data = as.data.frame(pca$rld),
  #            pch = 16, cerld = 2, aspect = "iso", col = colours, main = draw.key(key = list(rect = list(col = colours),
  #                                                                                         terldt = list(levels(fac)), rep = FALSE)))
}

In [ ]:
rld_pca(rld, colors=mycols, intgroup="condition")

Get the differential expression results


In [ ]:
res = results(dds)

In [ ]:
table(res$padj<0.05)

In [ ]:
res = res[order(res$padj),]
resdata = merge(as.data.frame(res), as.data.frame(counts(dds, normalized=TRUE)), by="row.names", sort=FALSE)
names(resdata)[1] = "Gene"
head(resdata)

In [ ]:
write.csv(resdata, file="../results/d37bcm.deseq.csv")

In [ ]:
hist(res$pvalue, breaks=50, col="grey")

MA plot


In [ ]:
maplot <- function (res, thresh=0.05, labelsig=TRUE, textcx=1, ...) {
  with(res, plot(baseMean, log2FoldChange, pch=20, cex=.5, log="x", ...))
  with(subset(res, padj<thresh), points(baseMean, log2FoldChange, col="red", pch=20, cex=1.5))
  if (labelsig) {
    require(calibrate)
    with(subset(res, padj<thresh), textxy(baseMean, log2FoldChange, labs=Gene, cex=textcx, col=2))
  }
}

In [ ]:
maplot(resdata, main="MA Plot")

Volcano plot


In [ ]:
volcanoplot <- function (res, lfcthresh=2, sigthresh=0.05, main="Volcano Plot", legendpos="bottomright", labelsig=TRUE, textcx=1, ...) {
  with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main=main, ...))
  with(subset(res, padj<sigthresh ), points(log2FoldChange, -log10(pvalue), pch=20, col="red", ...))
  with(subset(res, abs(log2FoldChange)>lfcthresh), points(log2FoldChange, -log10(pvalue), pch=20, col="orange", ...))
  with(subset(res, padj<sigthresh & abs(log2FoldChange)>lfcthresh), points(log2FoldChange, -log10(pvalue), pch=20, col="green", ...))
  if (labelsig) {
    require(calibrate)
    with(subset(res, padj<sigthresh & abs(log2FoldChange)>lfcthresh), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=textcx, ...))
  }
  legend(legendpos, xjust=1, yjust=1, legend=c(paste("FDR<",sigthresh,sep=""), paste("|LogFC|>",lfcthresh,sep=""), "both"), pch=20, col=c("red","orange","green"))
}

In [ ]:
volcanoplot(resdata, lfcthresh=1, sigthresh=0.05, textcx=.8)

In [ ]: